In [1]:
# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
In [2]:
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
In [4]:
nltk.download('all')
[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_cat is already up-to-date!
[nltk_data]    | Downloading package cess_esp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_esp is already up-to-date!
[nltk_data]    | Downloading package chat80 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package chat80 is already up-to-date!
[nltk_data]    | Downloading package city_database to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package city_database is already up-to-date!
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package comparative_sentences to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comparative_sentences is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package comtrans to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comtrans is already up-to-date!
[nltk_data]    | Downloading package conll2000 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2000 is already up-to-date!
[nltk_data]    | Downloading package conll2002 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2002 is already up-to-date!
[nltk_data]    | Downloading package conll2007 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2007 is already up-to-date!
[nltk_data]    | Downloading package crubadan to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package crubadan is already up-to-date!
[nltk_data]    | Downloading package dependency_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dependency_treebank is already up-to-date!
[nltk_data]    | Downloading package dolch to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dolch is already up-to-date!
[nltk_data]    | Downloading package europarl_raw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package europarl_raw is already up-to-date!
[nltk_data]    | Downloading package floresta to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package floresta is already up-to-date!
[nltk_data]    | Downloading package framenet_v15 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v15 is already up-to-date!
[nltk_data]    | Downloading package framenet_v17 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v17 is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package ieer to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ieer is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package indian to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package indian is already up-to-date!
[nltk_data]    | Downloading package jeita to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package jeita is already up-to-date!
[nltk_data]    | Downloading package kimmo to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package kimmo is already up-to-date!
[nltk_data]    | Downloading package knbc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package knbc is already up-to-date!
[nltk_data]    | Downloading package lin_thesaurus to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package lin_thesaurus is already up-to-date!
[nltk_data]    | Downloading package mac_morpho to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mac_morpho is already up-to-date!
[nltk_data]    | Downloading package machado to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package machado is already up-to-date!
[nltk_data]    | Downloading package masc_tagged to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package masc_tagged is already up-to-date!
[nltk_data]    | Downloading package moses_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package moses_sample is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to-date!
[nltk_data]    | Downloading package names to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package names is already up-to-date!
[nltk_data]    | Downloading package nombank.1.0 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nombank.1.0 is already up-to-date!
[nltk_data]    | Downloading package nps_chat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nps_chat is already up-to-date!
[nltk_data]    | Downloading package omw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package omw is already up-to-date!
[nltk_data]    | Downloading package opinion_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package opinion_lexicon is already up-to-date!
[nltk_data]    | Downloading package paradigms to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package paradigms is already up-to-date!
[nltk_data]    | Downloading package pil to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pil is already up-to-date!
[nltk_data]    | Downloading package pl196x to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pl196x is already up-to-date!
[nltk_data]    | Downloading package ppattach to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ppattach is already up-to-date!
[nltk_data]    | Downloading package problem_reports to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package problem_reports is already up-to-date!
[nltk_data]    | Downloading package propbank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package propbank is already up-to-date!
[nltk_data]    | Downloading package ptb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ptb is already up-to-date!
[nltk_data]    | Downloading package product_reviews_1 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_1 is already up-to-date!
[nltk_data]    | Downloading package product_reviews_2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_2 is already up-to-date!
[nltk_data]    | Downloading package pros_cons to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pros_cons is already up-to-date!
[nltk_data]    | Downloading package qc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package qc is already up-to-date!
[nltk_data]    | Downloading package reuters to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package reuters is already up-to-date!
[nltk_data]    | Downloading package rte to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rte is already up-to-date!
[nltk_data]    | Downloading package semcor to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package semcor is already up-to-date!
[nltk_data]    | Downloading package senseval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package senseval is already up-to-date!
[nltk_data]    | Downloading package sentiwordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentiwordnet is already up-to-date!
[nltk_data]    | Downloading package sentence_polarity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentence_polarity is already up-to-date!
[nltk_data]    | Downloading package shakespeare to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package shakespeare is already up-to-date!
[nltk_data]    | Downloading package sinica_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sinica_treebank is already up-to-date!
[nltk_data]    | Downloading package smultron to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package smultron is already up-to-date!
[nltk_data]    | Downloading package state_union to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package state_union is already up-to-date!
[nltk_data]    | Downloading package stopwords to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package stopwords is already up-to-date!
[nltk_data]    | Downloading package subjectivity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package subjectivity is already up-to-date!
[nltk_data]    | Downloading package swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package swadesh is already up-to-date!
[nltk_data]    | Downloading package switchboard to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package switchboard is already up-to-date!
[nltk_data]    | Downloading package timit to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package timit is already up-to-date!
[nltk_data]    | Downloading package toolbox to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package toolbox is already up-to-date!
[nltk_data]    | Downloading package treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package treebank is already up-to-date!
[nltk_data]    | Downloading package twitter_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package twitter_samples is already up-to-date!
[nltk_data]    | Downloading package udhr to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr is already up-to-date!
[nltk_data]    | Downloading package udhr2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr2 is already up-to-date!
[nltk_data]    | Downloading package unicode_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package unicode_samples is already up-to-date!
[nltk_data]    | Downloading package universal_treebanks_v20 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_treebanks_v20 is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package verbnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet is already up-to-date!
[nltk_data]    | Downloading package verbnet3 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet3 is already up-to-date!
[nltk_data]    | Downloading package webtext to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package webtext is already up-to-date!
[nltk_data]    | Downloading package wordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet is already up-to-date!
[nltk_data]    | Downloading package wordnet_ic to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet_ic is already up-to-date!
[nltk_data]    | Downloading package words to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package words is already up-to-date!
[nltk_data]    | Downloading package ycoe to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ycoe is already up-to-date!
[nltk_data]    | Downloading package rslp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rslp is already up-to-date!
[nltk_data]    | Downloading package maxent_treebank_pos_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_treebank_pos_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package universal_tagset to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_tagset is already up-to-date!
[nltk_data]    | Downloading package maxent_ne_chunker to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_ne_chunker is already up-to-date!
[nltk_data]    | Downloading package punkt to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package punkt is already up-to-date!
[nltk_data]    | Downloading package book_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package book_grammars is already up-to-date!
[nltk_data]    | Downloading package sample_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sample_grammars is already up-to-date!
[nltk_data]    | Downloading package spanish_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package spanish_grammars is already up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package basque_grammars is already up-to-date!
[nltk_data]    | Downloading package large_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package large_grammars is already up-to-date!
[nltk_data]    | Downloading package tagsets to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package tagsets is already up-to-date!
[nltk_data]    | Downloading package snowball_data to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package snowball_data is already up-to-date!
[nltk_data]    | Downloading package bllip_wsj_no_aux to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package bllip_wsj_no_aux is already up-to-date!
[nltk_data]    | Downloading package word2vec_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package word2vec_sample is already up-to-date!
[nltk_data]    | Downloading package panlex_swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package panlex_swadesh is already up-to-date!
[nltk_data]    | Downloading package mte_teip5 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mte_teip5 is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package perluniprops to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package perluniprops is already up-to-date!
[nltk_data]    | Downloading package nonbreaking_prefixes to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nonbreaking_prefixes is already up-to-date!
[nltk_data]    | Downloading package vader_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package vader_lexicon is already up-to-date!
[nltk_data]    | Downloading package porter_test to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package porter_test is already up-to-date!
[nltk_data]    | Downloading package wmt15_eval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wmt15_eval is already up-to-date!
[nltk_data]    | Downloading package mwa_ppdb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mwa_ppdb is already up-to-date!
[nltk_data]    | 
[nltk_data]  Done downloading collection all
Out[4]:
True
In [5]:
# path = '/content/drive/MyDrive/Files/'

path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
 
df_movies = pd.read_csv(path + 'ottmovies.csv')
 
df_movies.head()
Out[5]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type
0 1 Inception 2010 13+ 8.8 87% Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148.0 movie NaN 1 0 0 0 0
1 2 The Matrix 1999 16+ 8.7 88% Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136.0 movie NaN 1 0 0 0 0
2 3 Avengers: Infinity War 2018 13+ 8.4 85% Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149.0 movie NaN 1 0 0 0 0
3 4 Back to the Future 1985 7+ 8.5 96% Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116.0 movie NaN 1 0 0 0 0
4 5 The Good, the Bad and the Ugly 1966 16+ 8.8 97% Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161.0 movie NaN 1 0 1 0 0
In [6]:
# profile = ProfileReport(df_movies)
# profile
In [7]:
def data_investigate(df):
    print('No of Rows : ', df.shape[0])
    print('No of Coloums : ', df.shape[1])
    print('**'*25)
    print('Colums Names : \n', df.columns)
    print('**'*25)
    print('Datatype of Columns : \n', df.dtypes)
    print('**'*25)
    print('Missing Values : ')
    c = df.isnull().sum()
    c = c[c > 0]
    print(c)
    print('**'*25)
    print('Missing vaules %age wise :\n')
    print((100*(df.isnull().sum()/len(df.index))))
    print('**'*25)
    print('Pictorial Representation : ')
    plt.figure(figsize = (10, 10))
    sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
    plt.show()
In [8]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime            float64
Kind                object
Seasons            float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
dtype: object
**************************************************
Missing Values : 
Age                 8457
IMDb                 328
Rotten Tomatoes    10437
Directors            357
Cast                 648
Genres               234
Country              303
Language             437
Plotline            4958
Runtime              382
Seasons            16923
dtype: int64
**************************************************
Missing vaules %age wise :

ID                   0.000000
Title                0.000000
Year                 0.000000
Age                 49.973409
IMDb                 1.938191
Rotten Tomatoes     61.673462
Directors            2.109555
Cast                 3.829108
Genres               1.382734
Country              1.790463
Language             2.582284
Plotline            29.297406
Runtime              2.257283
Kind                 0.000000
Seasons            100.000000
Netflix              0.000000
Hulu                 0.000000
Prime Video          0.000000
Disney+              0.000000
Type                 0.000000
dtype: float64
**************************************************
Pictorial Representation : 
In [9]:
# ID
# df_movies = df_movies.drop(['ID'], axis = 1)
 
# Age
df_movies.loc[df_movies['Age'].isnull() & df_movies['Disney+'] == 1, "Age"] = '13'
# df_movies.fillna({'Age' : 18}, inplace = True)
df_movies.fillna({'Age' : 'NR'}, inplace = True)
df_movies['Age'].replace({'all': '0'}, inplace = True)
df_movies['Age'].replace({'7+': '7'}, inplace = True)
df_movies['Age'].replace({'13+': '13'}, inplace = True)
df_movies['Age'].replace({'16+': '16'}, inplace = True)
df_movies['Age'].replace({'18+': '18'}, inplace = True)
# df_movies['Age'] = df_movies['Age'].astype(int)
 
# IMDb
# df_movies.fillna({'IMDb' : df_movies['IMDb'].mean()}, inplace = True)
# df_movies.fillna({'IMDb' : df_movies['IMDb'].median()}, inplace = True)
df_movies.fillna({'IMDb' : "NA"}, inplace = True)
 
# Rotten Tomatoes
df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].astype(int)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].mean()}, inplace = True)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].median()}, inplace = True)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'].astype(int)
df_movies.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
 
# Directors
# df_movies = df_movies.drop(['Directors'], axis = 1)
df_movies.fillna({'Directors' : "NA"}, inplace = True)
 
# Cast
df_movies.fillna({'Cast' : "NA"}, inplace = True)
 
# Genres
df_movies.fillna({'Genres': "NA"}, inplace = True)
 
# Country
df_movies.fillna({'Country': "NA"}, inplace = True)
 
# Language
df_movies.fillna({'Language': "NA"}, inplace = True)
 
# Plotline
df_movies.fillna({'Plotline': "NA"}, inplace = True)
 
# Runtime
# df_movies.fillna({'Runtime' : df_movies['Runtime'].mean()}, inplace = True)
# df_movies['Runtime'] = df_movies['Runtime'].astype(int)
df_movies.fillna({'Runtime' : "NA"}, inplace = True)
 
# Kind
# df_movies.fillna({'Kind': "NA"}, inplace = True)
 
# Type
# df_movies.fillna({'Type': "NA"}, inplace = True)
# df_movies = df_movies.drop(['Type'], axis = 1)
 
# Seasons
# df_movies.fillna({'Seasons': 1}, inplace = True)
# df_movies.fillna({'Seasons': "NA"}, inplace = True)
df_movies = df_movies.drop(['Seasons'], axis = 1)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
# df_movies.fillna({'Seasons' : df_movies['Seasons'].mean()}, inplace = True)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
 
# Service Provider
df_movies['Service Provider'] = df_movies.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_movies.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)

# Removing Duplicate and Missing Entries
df_movies.dropna(how = 'any', inplace = True)
df_movies.drop_duplicates(inplace = True)
In [10]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
       'Service Provider'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb                object
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime             object
Kind                object
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
Service Provider    object
dtype: object
**************************************************
Missing Values : 
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :

ID                  0.0
Title               0.0
Year                0.0
Age                 0.0
IMDb                0.0
Rotten Tomatoes     0.0
Directors           0.0
Cast                0.0
Genres              0.0
Country             0.0
Language            0.0
Plotline            0.0
Runtime             0.0
Kind                0.0
Netflix             0.0
Hulu                0.0
Prime Video         0.0
Disney+             0.0
Type                0.0
Service Provider    0.0
dtype: float64
**************************************************
Pictorial Representation : 
In [11]:
df_movies.head()
Out[11]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix
1 2 The Matrix 1999 16 8.7 88 Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix
2 3 Avengers: Infinity War 2018 13 8.4 85 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix
3 4 Back to the Future 1985 7 8.5 96 Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116 movie 1 0 0 0 0 Netflix
4 5 The Good, the Bad and the Ugly 1966 16 8.8 97 Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161 movie 1 0 1 0 0 Netflix
In [12]:
df_movies.describe()
Out[12]:
ID Year Netflix Hulu Prime Video Disney+ Type
count 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.0
mean 8462.000000 2003.211901 0.214915 0.062637 0.727235 0.033150 0.0
std 4885.393638 20.526532 0.410775 0.242315 0.445394 0.179034 0.0
min 1.000000 1901.000000 0.000000 0.000000 0.000000 0.000000 0.0
25% 4231.500000 2001.000000 0.000000 0.000000 0.000000 0.000000 0.0
50% 8462.000000 2012.000000 0.000000 0.000000 1.000000 0.000000 0.0
75% 12692.500000 2016.000000 0.000000 0.000000 1.000000 0.000000 0.0
max 16923.000000 2020.000000 1.000000 1.000000 1.000000 1.000000 0.0
In [13]:
df_movies.corr()
Out[13]:
ID Year Netflix Hulu Prime Video Disney+ Type
ID 1.000000 -0.217816 -0.644470 -0.129926 0.469301 0.263530 NaN
Year -0.217816 1.000000 0.256151 0.101337 -0.255578 -0.047258 NaN
Netflix -0.644470 0.256151 1.000000 -0.118032 -0.745141 -0.089649 NaN
Hulu -0.129926 0.101337 -0.118032 1.000000 -0.284654 -0.039693 NaN
Prime Video 0.469301 -0.255578 -0.745141 -0.284654 1.000000 -0.289008 NaN
Disney+ 0.263530 -0.047258 -0.089649 -0.039693 -0.289008 1.000000 NaN
Type NaN NaN NaN NaN NaN NaN NaN
In [14]:
# df_movies.sort_values('Year', ascending = True)
# df_movies.sort_values('IMDb', ascending = False)
In [15]:
# df_movies.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_ottmovies.csv', index = False)
 
# path = '/content/drive/MyDrive/Files/'
 
# udf_movies = pd.read_csv(path + 'updated_ottmovies.csv')
 
# udf_movies
In [16]:
# df_netflix_movies = df_movies.loc[(df_movies['Netflix'] > 0)]
# df_hulu_movies = df_movies.loc[(df_movies['Hulu'] > 0)]
# df_prime_video_movies = df_movies.loc[(df_movies['Prime Video'] > 0)]
# df_disney_movies = df_movies.loc[(df_movies['Disney+'] > 0)]
In [17]:
df_netflix_only_movies = df_movies[(df_movies['Netflix'] == 1) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_hulu_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 1) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_prime_video_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 1 ) & (df_movies['Disney+'] == 0)]
df_disney_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 1)]
In [18]:
df_movies_imdb = df_movies.copy()
In [19]:
df_movies_imdb.drop(df_movies_imdb.loc[df_movies_imdb['IMDb'] == "NA"].index, inplace = True)
# df_movies_imdb = df_movies_imdb[df_movies_imdb.IMDb != "NA"]
df_movies_imdb['IMDb'] = df_movies_imdb['IMDb'].astype(int)
In [20]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_imdb_movies = df_movies_imdb.loc[df_movies_imdb['Netflix'] == 1]
hulu_imdb_movies = df_movies_imdb.loc[df_movies_imdb['Hulu'] == 1]
prime_video_imdb_movies = df_movies_imdb.loc[df_movies_imdb['Prime Video'] == 1]
disney_imdb_movies = df_movies_imdb.loc[df_movies_imdb['Disney+'] == 1]
In [21]:
df_movies_imdb_group = df_movies_imdb.copy()
In [22]:
plt.figure(figsize = (10, 10))
corr = df_movies_imdb.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
In [23]:
df_imdb_high_movies = df_movies_imdb.sort_values(by = 'IMDb', ascending = False).reset_index()
df_imdb_high_movies = df_imdb_high_movies.drop(['index'], axis = 1)
# filter = (df_movies_imdb['IMDb'] == (df_movies_imdb['IMDb'].max()))
# df_imdb_high_movies = df_movies_imdb[filter]
 
# highest_rated_movies = df_movies_imdb.loc[df_movies_imdb['IMDb'].idxmax()]
 
print('\nMovies with Highest Ever IMDb  are : \n')
df_imdb_high_movies.head(5)
Movies with Highest Ever IMDb  are : 

Out[23]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 7767 Operation Toussaint: Operation Underground Rai... 2018 NR 9 NA Nick Nanton,Ramy Romany Tim Ballard,Anthony Robbins,Glenn Beck,Orrin H... Documentary United States English A comedy about a cheating husband (Ray Liotta)... 82 movie 0 0 1 0 0 Prime Video
1 7300 Finding Family 2013 13 9 NA Chris Leslie,Oggi Tomic NA Documentary,Family,History,War United Kingdom,Bosnia and Herzegovina Bosnian,English A woman named Grace retires with her two child... 56 movie 0 0 1 0 0 Prime Video
2 13378 Concrete Cowboys 1979 NR 9 79 Ty Javos Ty Javos,Benjamin Davies,Chris Sunberg,Bryce S... Short,Drama Canada English Rachel is 8 and for the first time on Christma... 5 movie 0 0 1 0 0 Prime Video
3 8678 Weaving the Past: Journey of Discovery 2014 NR 9 NA Walter Dominguez NA Documentary United States Spanish,English Sonny "Sundown" Garcia is the top North Americ... 126 movie 0 0 1 0 0 Prime Video
4 16587 Almost Impossible 2017 18 9 NA Andrew Espinoza Long Erica Chase,Andrew Espinoza Long,Richard Randa... Short,Drama United States English NA 5 movie 0 1 0 0 0 Hulu
In [24]:
fig = px.bar(y = df_imdb_high_movies['Title'][:15],
             x = df_imdb_high_movies['IMDb'][:15], 
             color = df_imdb_high_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Highest IMDb : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [25]:
df_imdb_low_movies = df_movies_imdb.sort_values(by = 'IMDb', ascending = True).reset_index()
df_imdb_low_movies = df_imdb_low_movies.drop(['index'], axis = 1)
# filter = (df_movies_imdb['IMDb'] == (df_movies_imdb['IMDb'].min()))
# df_imdb_low_movies = df_movies_imdb[filter]

print('\nMovies with Lowest Ever IMDb  are : \n')
df_imdb_low_movies.head(5)
Movies with Lowest Ever IMDb  are : 

Out[25]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 13433 In Memoriam Alexander Litvinenko 2007 NR 0 NA Jos de Putter NA Documentary Netherlands English The Hellcats are an all-female gang bent on bu... 55 movie 0 0 1 0 0 Prime Video
1 13441 From Philadelphia To Fallujah 2011 NR 0 NA David Hammelburg Harry Lennix Documentary,Short United States English Bee People is not just a documentary. It is an... 42 movie 0 0 1 0 0 Prime Video
2 4238 9/11: 15 years later 2016 NR 0 NA NA Richard Gage,Luke Rudkowski,Coen Vermeeren Documentary Netherlands English NA 60 movie 0 1 0 0 0 Hulu
3 13549 Return of the Boogeyman 1994 NR 1 NA Deland Nuse,Ulli Lommel Kelly Galindo,Suzanna Love,Omar Kaczmarczyk,Ma... Horror United States English NA 76 movie 0 0 1 0 0 Prime Video
4 15286 Curse of Bigfoot 1978 NR 1 NA Dave Flocker Bob Clymire,Jan Swihart,Bill Simonsen,Dennis K... Horror United States English A headstrong animal-rights activist group plan... 88 movie 0 0 1 0 0 Prime Video
In [26]:
fig = px.bar(y = df_imdb_low_movies['Title'][:15],
             x = df_imdb_low_movies['IMDb'][:15], 
             color = df_imdb_low_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Lowest IMDb : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [27]:
print(f'''
      Total '{df_movies_imdb['IMDb'].unique().shape[0]}' unique IMDb s were Given, They were Like this,\n
      
{df_movies_imdb.sort_values(by = 'IMDb', ascending = False)['IMDb'].unique()}\n
 
      The Highest Ever IMDb Ever Any Movie Got is '{df_imdb_high_movies['Title'][0]}' : '{df_imdb_high_movies['IMDb'].max()}'\n
 
      The Lowest Ever IMDb Ever Any Movie Got is '{df_imdb_low_movies['Title'][0]}' : '{df_imdb_low_movies['IMDb'].min()}'\n
      ''')
      Total '10' unique IMDb s were Given, They were Like this,

      
[9 8 7 6 5 4 3 2 1 0]

 
      The Highest Ever IMDb Ever Any Movie Got is 'Operation Toussaint: Operation Underground Railroad and the Fight to End Modern Day Slavery' : '9'

 
      The Lowest Ever IMDb Ever Any Movie Got is 'In Memoriam Alexander Litvinenko' : '0'

      
In [28]:
netflix_imdb_high_movies = df_imdb_high_movies.loc[df_imdb_high_movies['Netflix']==1].reset_index()
netflix_imdb_high_movies = netflix_imdb_high_movies.drop(['index'], axis = 1)
 
netflix_imdb_low_movies = df_imdb_low_movies.loc[df_imdb_low_movies['Netflix']==1].reset_index()
netflix_imdb_low_movies = netflix_imdb_low_movies.drop(['index'], axis = 1)
 
netflix_imdb_high_movies.head(5)
Out[28]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 812 God of War 2017 NR 9 46 Cory Barlog Christopher Judge,Sunny Suljic,Jeremy Davies,D... Action,Adventure,Drama,Fantasy United States English,Norse, Old Kratos, the God of War, has defeated the Gods ... NA movie 1 0 0 0 0 Netflix
1 931 Natsamrat 2016 NR 9 NA Mahesh Manjrekar Nana Patekar,Medha Manjrekar,Mrunmayee Deshpan... Drama,Family India Marathi The film is a tragedy about a veteran theatre ... 166 movie 1 0 0 0 0 Netflix
2 1101 It Takes Two 1995 7 9 8 Josef Fares Joseph Balderrama,Annabelle Dowler,Clare Corbe... Adventure Sweden English Embark on the craziest journey of your life in... 101 movie 1 0 1 0 0 Netflix
3 1026 Bo Burnham: What. 2013 18 8 NA Bo Burnham,Christopher Storer Bo Burnham Comedy,Music United States English NA 60 movie 1 0 0 0 0 Netflix
4 1046 Struggle: The Life and Lost Art of Szukalski 2018 18 8 NA Irek Dobrowolski Stanislav Szukalski,Glenn Bray,Robert Williams... Documentary Poland,United States English In old Betamax footage, the Polish-American ar... 115 movie 1 0 0 0 0 Netflix
In [29]:
fig = px.bar(y = netflix_imdb_high_movies['Title'][:15],
             x = netflix_imdb_high_movies['IMDb'][:15], 
             color = netflix_imdb_high_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Highest IMDb : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [30]:
fig = px.bar(y = netflix_imdb_low_movies['Title'][:15],
             x = netflix_imdb_low_movies['IMDb'][:15], 
             color = netflix_imdb_low_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Lowest IMDb : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [31]:
hulu_imdb_high_movies = df_imdb_high_movies.loc[df_imdb_high_movies['Hulu']==1].reset_index()
hulu_imdb_high_movies = hulu_imdb_high_movies.drop(['index'], axis = 1)
 
hulu_imdb_low_movies = df_imdb_low_movies.loc[df_imdb_low_movies['Hulu']==1].reset_index()
hulu_imdb_low_movies = hulu_imdb_low_movies.drop(['index'], axis = 1)
 
hulu_imdb_high_movies.head(5)
Out[31]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 16587 Almost Impossible 2017 18 9 NA Andrew Espinoza Long Erica Chase,Andrew Espinoza Long,Richard Randa... Short,Drama United States English NA 5 movie 0 1 0 0 0 Hulu
1 16592 The Paley Center 2000 NR 9 NA Brad Lachman Margaret Cho,Raúl Esparza,Kelli Giddish,Marisk... NA United States English NA NA movie 0 1 0 0 0 Hulu
2 3460 The Dark Knight 2008 13 9 87 Christopher Nolan Christian Bale,Heath Ledger,Aaron Eckhart,Mich... Action,Crime,Drama,Thriller United States,United Kingdom English,Mandarin Set within a year after the events of Batman B... 152 movie 0 1 0 0 0 Hulu
3 3490 Apollo 11 2019 0 8 99 Todd Douglas Miller Neil Armstrong,Michael Collins,Buzz Aldrin,Dek... Documentary,History United States English On its fiftieth anniversary, the events surrou... 93 movie 0 1 0 0 0 Hulu
4 3480 Free Solo 2018 13 8 97 Jimmy Chin,Elizabeth Chai Vasarhelyi Alex Honnold,Tommy Caldwell,Jimmy Chin,Cheyne ... Documentary,Adventure,Sport United States English NA 100 movie 0 1 0 1 0 Disney+
In [32]:
fig = px.bar(y = hulu_imdb_high_movies['Title'][:15],
             x = hulu_imdb_high_movies['IMDb'][:15], 
             color = hulu_imdb_high_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Highest IMDb : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [33]:
fig = px.bar(y = hulu_imdb_low_movies['Title'][:15],
             x = hulu_imdb_low_movies['IMDb'][:15], 
             color = hulu_imdb_low_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Lowest IMDb : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [34]:
prime_video_imdb_high_movies = df_imdb_high_movies.loc[df_imdb_high_movies['Prime Video']==1].reset_index()
prime_video_imdb_high_movies = prime_video_imdb_high_movies.drop(['index'], axis = 1)
 
prime_video_imdb_low_movies = df_imdb_low_movies.loc[df_imdb_low_movies['Prime Video']==1].reset_index()
prime_video_imdb_low_movies = prime_video_imdb_low_movies.drop(['index'], axis = 1)
 
prime_video_imdb_high_movies.head(5)
Out[34]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 7767 Operation Toussaint: Operation Underground Rai... 2018 NR 9 NA Nick Nanton,Ramy Romany Tim Ballard,Anthony Robbins,Glenn Beck,Orrin H... Documentary United States English A comedy about a cheating husband (Ray Liotta)... 82 movie 0 0 1 0 0 Prime Video
1 7300 Finding Family 2013 13 9 NA Chris Leslie,Oggi Tomic NA Documentary,Family,History,War United Kingdom,Bosnia and Herzegovina Bosnian,English A woman named Grace retires with her two child... 56 movie 0 0 1 0 0 Prime Video
2 13378 Concrete Cowboys 1979 NR 9 79 Ty Javos Ty Javos,Benjamin Davies,Chris Sunberg,Bryce S... Short,Drama Canada English Rachel is 8 and for the first time on Christma... 5 movie 0 0 1 0 0 Prime Video
3 8678 Weaving the Past: Journey of Discovery 2014 NR 9 NA Walter Dominguez NA Documentary United States Spanish,English Sonny "Sundown" Garcia is the top North Americ... 126 movie 0 0 1 0 0 Prime Video
4 6823 Escape from Firebase Kate 2015 NR 9 NA Paul Kakert J.V. Martin Documentary United States English This is a thriller story about how police work... 60 movie 0 0 1 0 0 Prime Video
In [35]:
fig = px.bar(y = prime_video_imdb_high_movies['Title'][:15],
             x = prime_video_imdb_high_movies['IMDb'][:15], 
             color = prime_video_imdb_high_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Highest IMDb : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [36]:
fig = px.bar(y = prime_video_imdb_low_movies['Title'][:15],
             x = prime_video_imdb_low_movies['IMDb'][:15], 
             color = prime_video_imdb_low_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Lowest IMDb : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [37]:
disney_imdb_high_movies = df_imdb_high_movies.loc[df_imdb_high_movies['Disney+']==1].reset_index()
disney_imdb_high_movies = disney_imdb_high_movies.drop(['index'], axis = 1)
 
disney_imdb_low_movies = df_imdb_low_movies.loc[df_imdb_low_movies['Disney+']==1].reset_index()
disney_imdb_low_movies = disney_imdb_low_movies.drop(['index'], axis = 1)
 
disney_imdb_high_movies.head(5)
Out[37]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 16912 Marvel's Spider-Man 2017 7 9 NA Ryan Smith Yuri Lowenthal,Tara Platt,Travis Willingham,Wi... Action,Adventure,Comedy,Fantasy,Mystery,Sci-Fi United States English Hercules, son of the Greek God, Zeus, is turne... NA movie 0 0 0 1 0 Disney+
1 5254 Empire of Dreams: The Story of the Star Wars T... 2004 13 8 NA Edith Becker,Kevin Burns Robert Clotworthy,Walter Cronkite,George Lucas... Documentary,History,Sci-Fi United States English Nicko and his brother take off from Canada in ... 151 movie 0 0 1 1 0 Prime Video
2 15788 The Straight Story 1999 0 8 95 David Lynch Sissy Spacek,Jane Galloway Heitz,Joseph A. Car... Biography,Drama France,United Kingdom,United States English Four children from the same family have to lea... 112 movie 0 0 0 1 0 Disney+
3 15780 Togo 2019 7 8 92 Ericson Core Willem Dafoe,Julianne Nicholson,Christopher He... Adventure,Biography,Drama,Family,History United States English Jedi Master-in-hiding Luke Skywalker unwilling... 113 movie 0 0 0 1 0 Disney+
4 3480 Free Solo 2018 13 8 97 Jimmy Chin,Elizabeth Chai Vasarhelyi Alex Honnold,Tommy Caldwell,Jimmy Chin,Cheyne ... Documentary,Adventure,Sport United States English NA 100 movie 0 1 0 1 0 Disney+
In [38]:
fig = px.bar(y = disney_imdb_high_movies['Title'][:15],
             x = disney_imdb_high_movies['IMDb'][:15], 
             color = disney_imdb_high_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Highest IMDb : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [39]:
fig = px.bar(y = disney_imdb_low_movies['Title'][:15],
             x = disney_imdb_low_movies['IMDb'][:15], 
             color = disney_imdb_low_movies['IMDb'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Lowest IMDb : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [40]:
print(f'''
      The Movie with Highest IMDb  Ever Got is '{df_imdb_high_movies['Title'][0]}' : '{df_imdb_high_movies['IMDb'].max()}'\n
      The Movie with Lowest IMDb  Ever Got is '{df_imdb_low_movies['Title'][0]}' : '{df_imdb_low_movies['IMDb'].min()}'\n
      
      The Movie with Highest IMDb  on 'Netflix' is '{netflix_imdb_high_movies['Title'][0]}' : '{netflix_imdb_high_movies['IMDb'].max()}'\n
      The Movie with Lowest IMDb  on 'Netflix' is '{netflix_imdb_low_movies['Title'][0]}' : '{netflix_imdb_low_movies['IMDb'].min()}'\n
      
      The Movie with Highest IMDb  on 'Hulu' is '{hulu_imdb_high_movies['Title'][0]}' : '{hulu_imdb_high_movies['IMDb'].max()}'\n
      The Movie with Lowest IMDb  on 'Hulu' is '{hulu_imdb_low_movies['Title'][0]}' : '{hulu_imdb_low_movies['IMDb'].min()}'\n
      
      The Movie with Highest IMDb  on 'Prime Video' is '{prime_video_imdb_high_movies['Title'][0]}' : '{prime_video_imdb_high_movies['IMDb'].max()}'\n
      The Movie with Lowest IMDb  on 'Prime Video' is '{prime_video_imdb_low_movies['Title'][0]}' : '{prime_video_imdb_low_movies['IMDb'].min()}'\n
      
      The Movie with Highest IMDb  on 'Disney+' is '{disney_imdb_high_movies['Title'][0]}' : '{disney_imdb_high_movies['IMDb'].max()}'\n
      The Movie with Lowest IMDb  on 'Disney+' is '{disney_imdb_low_movies['Title'][0]}' : '{disney_imdb_low_movies['IMDb'].min()}'\n 
      ''')
      The Movie with Highest IMDb  Ever Got is 'Operation Toussaint: Operation Underground Railroad and the Fight to End Modern Day Slavery' : '9'

      The Movie with Lowest IMDb  Ever Got is 'In Memoriam Alexander Litvinenko' : '0'

      
      The Movie with Highest IMDb  on 'Netflix' is 'God of War' : '9'

      The Movie with Lowest IMDb  on 'Netflix' is 'Aerials' : '1'

      
      The Movie with Highest IMDb  on 'Hulu' is 'Almost Impossible' : '9'

      The Movie with Lowest IMDb  on 'Hulu' is '9/11: 15 years later' : '0'

      
      The Movie with Highest IMDb  on 'Prime Video' is 'Operation Toussaint: Operation Underground Railroad and the Fight to End Modern Day Slavery' : '9'

      The Movie with Lowest IMDb  on 'Prime Video' is 'In Memoriam Alexander Litvinenko' : '0'

      
      The Movie with Highest IMDb  on 'Disney+' is 'Marvel's Spider-Man' : '9'

      The Movie with Lowest IMDb  on 'Disney+' is 'Hacksaw' : '1'
 
      
In [41]:
print(f'''
      Accross All Platforms the Average IMDb  is '{round(df_movies_imdb['IMDb'].mean(), ndigits = 2)}'\n
      The Average IMDb  on 'Netflix' is '{round(netflix_imdb_movies['IMDb'].mean(), ndigits = 2)}'\n
      The Average IMDb  on 'Hulu' is '{round(hulu_imdb_movies['IMDb'].mean(), ndigits = 2)}'\n
      The Average IMDb  on 'Prime Video' is '{round(prime_video_imdb_movies['IMDb'].mean(), ndigits = 2)}'\n
      The Average IMDb  on 'Disney+' is '{round(disney_imdb_movies['IMDb'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average IMDb  is '5.53'

      The Average IMDb  on 'Netflix' is '5.83'

      The Average IMDb  on 'Hulu' is '5.83'

      The Average IMDb  on 'Prime Video' is '5.41'

      The Average IMDb  on 'Disney+' is '6.0'
 
      
In [42]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_movies_imdb['IMDb'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_movies_imdb['IMDb'], ax = ax[1])
plt.show()
In [43]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('IMDb s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_imdb_movies['IMDb'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_imdb_movies['IMDb'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_imdb_movies['IMDb'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_imdb_movies['IMDb'][:100], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [44]:
def round_val(data):
    if str(data) != 'nan':
        return round(data)
In [45]:
df_movies_imdb_group['IMDb Group'] = df_movies_imdb['IMDb'].apply(round_val)
 
imdb_values = df_movies_imdb_group['IMDb Group'].value_counts().sort_index(ascending = False).tolist()
imdb_index = df_movies_imdb_group['IMDb Group'].value_counts().sort_index(ascending = False).index
 
# imdb_values, imdb_index
In [46]:
imdb_group_count = df_movies_imdb_group.groupby('IMDb Group')['Title'].count()
imdb_group_movies = df_movies_imdb_group.groupby('IMDb Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
imdb_group_data_movies = pd.concat([imdb_group_count, imdb_group_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
imdb_group_data_movies = imdb_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [47]:
# IMDb Group with Movies Counts - All Platforms Combined
imdb_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
Out[47]:
IMDb Group Movies Count Netflix Hulu Prime Video Disney+
6 6 5185 1238 346 3607 225
5 5 3837 823 251 2795 117
7 7 3463 909 268 2290 145
4 4 2015 321 102 1608 34
3 3 982 82 22 893 4
8 8 652 171 49 433 31
2 2 392 33 7 359 1
1 1 44 2 0 40 2
9 9 22 3 3 16 1
0 0 3 0 1 2 0
In [48]:
imdb_group_data_movies.sort_values(by = 'IMDb Group', ascending = False)
Out[48]:
IMDb Group Movies Count Netflix Hulu Prime Video Disney+
9 9 22 3 3 16 1
8 8 652 171 49 433 31
7 7 3463 909 268 2290 145
6 6 5185 1238 346 3607 225
5 5 3837 823 251 2795 117
4 4 2015 321 102 1608 34
3 3 982 82 22 893 4
2 2 392 33 7 359 1
1 1 44 2 0 40 2
0 0 3 0 1 2 0
In [49]:
fig = px.bar(y = imdb_group_data_movies['Movies Count'],
             x = imdb_group_data_movies['IMDb Group'], 
             color = imdb_group_data_movies['IMDb Group'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies Count', 'x' : 'IMDb : Rating'},
             title  = 'Movies with Group IMDb : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [50]:
fig = px.pie(imdb_group_data_movies[:10],
             names = imdb_group_data_movies['IMDb Group'],
             values = imdb_group_data_movies['Movies Count'],
             color = imdb_group_data_movies['Movies Count'],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'Movies Count based on IMDb Group')
fig.show()
In [51]:
df_imdb_group_high_movies = imdb_group_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_imdb_group_high_movies = df_imdb_group_high_movies.drop(['index'], axis = 1)
# filter = (imdb_group_data_movies['Movies Count'] ==  (imdb_group_data_movies['Movies Count'].max()))
# df_imdb_group_high_movies = imdb_group_data_movies[filter]
 
# highest_rated_movies = imdb_group_data_movies.loc[imdb_group_data_movies['Movies Count'].idxmax()]
 
# print('\nIMDb with Highest Ever Movies Count are : All Platforms Combined\n')
df_imdb_group_high_movies.head(5)
Out[51]:
IMDb Group Movies Count Netflix Hulu Prime Video Disney+
0 6 5185 1238 346 3607 225
1 5 3837 823 251 2795 117
2 7 3463 909 268 2290 145
3 4 2015 321 102 1608 34
4 3 982 82 22 893 4
In [52]:
df_imdb_group_low_movies = imdb_group_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_imdb_group_low_movies = df_imdb_group_low_movies.drop(['index'], axis = 1)
# filter = (imdb_group_data_movies['Movies Count'] = =  (imdb_group_data_movies['Movies Count'].min()))
# df_imdb_group_low_movies = imdb_group_data_movies[filter]
 
# print('\nIMDb with Lowest Ever Movies Count are : All Platforms Combined\n')
df_imdb_group_low_movies.head(5)
Out[52]:
IMDb Group Movies Count Netflix Hulu Prime Video Disney+
0 0 3 0 1 2 0
1 9 22 3 3 16 1
2 1 44 2 0 40 2
3 2 392 33 7 359 1
4 8 652 171 49 433 31
In [53]:
print(f'''
      Total '{df_movies_imdb['IMDb'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see Movies from Total '{imdb_group_data_movies['IMDb Group'].unique().shape[0]}' IMDb Group, They were Like this, \n
 
      {imdb_group_data_movies.sort_values(by = 'Movies Count', ascending = False)['IMDb Group'].unique()} etc. \n
 
      The IMDb Group with Highest Movies Count have '{imdb_group_data_movies['Movies Count'].max()}' Movies Available is '{df_imdb_group_high_movies['IMDb Group'][0]}', &\n
      The IMDb Group with Lowest Movies Count have '{imdb_group_data_movies['Movies Count'].min()}' Movies Available is '{df_imdb_group_low_movies['IMDb Group'][0]}'
      ''')
      Total '16595' Titles are available on All Platforms, out of which

      You Can Choose to see Movies from Total '10' IMDb Group, They were Like this, 

 
      [6 5 7 4 3 8 2 1 9 0] etc. 

 
      The IMDb Group with Highest Movies Count have '5185' Movies Available is '6', &

      The IMDb Group with Lowest Movies Count have '3' Movies Available is '0'
      
In [54]:
netflix_imdb_group_movies = imdb_group_data_movies[imdb_group_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_imdb_group_movies = netflix_imdb_group_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_imdb_group_high_movies = df_imdb_group_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_imdb_group_high_movies = netflix_imdb_group_high_movies.drop(['index'], axis = 1)
 
netflix_imdb_group_low_movies = df_imdb_group_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_imdb_group_low_movies = netflix_imdb_group_low_movies.drop(['index'], axis = 1)
 
netflix_imdb_group_high_movies.head(5)
Out[54]:
IMDb Group Movies Count Netflix Hulu Prime Video Disney+
0 6 5185 1238 346 3607 225
1 7 3463 909 268 2290 145
2 5 3837 823 251 2795 117
3 4 2015 321 102 1608 34
4 8 652 171 49 433 31
In [55]:
hulu_imdb_group_movies = imdb_group_data_movies[imdb_group_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_imdb_group_movies = hulu_imdb_group_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_imdb_group_high_movies = df_imdb_group_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_imdb_group_high_movies = hulu_imdb_group_high_movies.drop(['index'], axis = 1)
 
hulu_imdb_group_low_movies = df_imdb_group_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_imdb_group_low_movies = hulu_imdb_group_low_movies.drop(['index'], axis = 1)
 
hulu_imdb_group_high_movies.head(5)
Out[55]:
IMDb Group Movies Count Netflix Hulu Prime Video Disney+
0 6 5185 1238 346 3607 225
1 7 3463 909 268 2290 145
2 5 3837 823 251 2795 117
3 4 2015 321 102 1608 34
4 8 652 171 49 433 31
In [56]:
prime_video_imdb_group_movies = imdb_group_data_movies[imdb_group_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_imdb_group_movies = prime_video_imdb_group_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_imdb_group_high_movies = df_imdb_group_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_imdb_group_high_movies = prime_video_imdb_group_high_movies.drop(['index'], axis = 1)
 
prime_video_imdb_group_low_movies = df_imdb_group_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_imdb_group_low_movies = prime_video_imdb_group_low_movies.drop(['index'], axis = 1)
 
prime_video_imdb_group_high_movies.head(5)
Out[56]:
IMDb Group Movies Count Netflix Hulu Prime Video Disney+
0 6 5185 1238 346 3607 225
1 5 3837 823 251 2795 117
2 7 3463 909 268 2290 145
3 4 2015 321 102 1608 34
4 3 982 82 22 893 4
In [57]:
disney_imdb_group_movies = imdb_group_data_movies[imdb_group_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_imdb_group_movies = disney_imdb_group_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_imdb_group_high_movies = df_imdb_group_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_imdb_group_high_movies = disney_imdb_group_high_movies.drop(['index'], axis = 1)
 
disney_imdb_group_low_movies = df_imdb_group_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_imdb_group_low_movies = disney_imdb_group_low_movies.drop(['index'], axis = 1)
 
disney_imdb_group_high_movies.head(5)
Out[57]:
IMDb Group Movies Count Netflix Hulu Prime Video Disney+
0 6 5185 1238 346 3607 225
1 7 3463 909 268 2290 145
2 5 3837 823 251 2795 117
3 4 2015 321 102 1608 34
4 8 652 171 49 433 31
In [58]:
print(f'''
      The IMDb Group with Highest Movies Count Ever Got is '{df_imdb_group_high_movies['IMDb Group'][0]}' : '{df_imdb_group_high_movies['Movies Count'].max()}'\n
      The IMDb Group with Lowest Movies Count Ever Got is '{df_imdb_group_low_movies['IMDb Group'][0]}' : '{df_imdb_group_low_movies['Movies Count'].min()}'\n
      
      The IMDb Group with Highest Movies Count on 'Netflix' is '{netflix_imdb_group_high_movies['IMDb Group'][0]}' : '{netflix_imdb_group_high_movies['Netflix'].max()}'\n
      The IMDb Group with Lowest Movies Count on 'Netflix' is '{netflix_imdb_group_low_movies['IMDb Group'][0]}' : '{netflix_imdb_group_low_movies['Netflix'].min()}'\n
      
      The IMDb Group with Highest Movies Count on 'Hulu' is '{hulu_imdb_group_high_movies['IMDb Group'][0]}' : '{hulu_imdb_group_high_movies['Hulu'].max()}'\n
      The IMDb Group with Lowest Movies Count on 'Hulu' is '{hulu_imdb_group_low_movies['IMDb Group'][0]}' : '{hulu_imdb_group_low_movies['Hulu'].min()}'\n
      
      The IMDb Group with Highest Movies Count on 'Prime Video' is '{prime_video_imdb_group_high_movies['IMDb Group'][0]}' : '{prime_video_imdb_group_high_movies['Prime Video'].max()}'\n
      The IMDb Group with Lowest Movies Count on 'Prime Video' is '{prime_video_imdb_group_low_movies['IMDb Group'][0]}' : '{prime_video_imdb_group_low_movies['Prime Video'].min()}'\n
      
      The IMDb Group with Highest Movies Count on 'Disney+' is '{disney_imdb_group_high_movies['IMDb Group'][0]}' : '{disney_imdb_group_high_movies['Disney+'].max()}'\n
      The IMDb Group with Lowest Movies Count on 'Disney+' is '{disney_imdb_group_low_movies['IMDb Group'][0]}' : '{disney_imdb_group_low_movies['Disney+'].min()}'\n 
      ''')
      The IMDb Group with Highest Movies Count Ever Got is '6' : '5185'

      The IMDb Group with Lowest Movies Count Ever Got is '0' : '3'

      
      The IMDb Group with Highest Movies Count on 'Netflix' is '6' : '1238'

      The IMDb Group with Lowest Movies Count on 'Netflix' is '0' : '0'

      
      The IMDb Group with Highest Movies Count on 'Hulu' is '6' : '346'

      The IMDb Group with Lowest Movies Count on 'Hulu' is '1' : '0'

      
      The IMDb Group with Highest Movies Count on 'Prime Video' is '6' : '3607'

      The IMDb Group with Lowest Movies Count on 'Prime Video' is '0' : '2'

      
      The IMDb Group with Highest Movies Count on 'Disney+' is '6' : '225'

      The IMDb Group with Lowest Movies Count on 'Disney+' is '0' : '0'
 
      
In [59]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_i_ax1 = sns.barplot(x = netflix_imdb_group_movies['IMDb Group'][:10], y = netflix_imdb_group_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_i_ax2 = sns.barplot(x = hulu_imdb_group_movies['IMDb Group'][:10], y = hulu_imdb_group_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_i_ax3 = sns.barplot(x = prime_video_imdb_group_movies['IMDb Group'][:10], y = prime_video_imdb_group_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_i_ax4 = sns.barplot(x = disney_imdb_group_movies['IMDb Group'][:10], y = disney_imdb_group_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_i_ax1.title.set_text(labels[0])
h_i_ax2.title.set_text(labels[1])
p_i_ax3.title.set_text(labels[2])
d_i_ax4.title.set_text(labels[3])
 
plt.show()
In [60]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = imdb_group_data_movies['IMDb Group'], y = imdb_group_data_movies['Netflix'], color = 'red')
sns.lineplot(x = imdb_group_data_movies['IMDb Group'], y = imdb_group_data_movies['Hulu'], color = 'lightgreen')
sns.lineplot(x = imdb_group_data_movies['IMDb Group'], y = imdb_group_data_movies['Prime Video'], color = 'lightblue')
sns.lineplot(x = imdb_group_data_movies['IMDb Group'], y = imdb_group_data_movies['Disney+'], color = 'darkblue')
plt.xlabel('IMDb Group', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
In [61]:
print(f'''
      Accross All Platforms Total Count of IMDb Group is '{imdb_group_data_movies['IMDb Group'].unique().shape[0]}'\n
      Total Count of IMDb Group on 'Netflix' is '{netflix_imdb_group_movies['IMDb Group'].unique().shape[0]}'\n
      Total Count of IMDb Group on 'Hulu' is '{hulu_imdb_group_movies['IMDb Group'].unique().shape[0]}'\n
      Total Count of IMDb Group on 'Prime Video' is '{prime_video_imdb_group_movies['IMDb Group'].unique().shape[0]}'\n
      Total Count of IMDb Group on 'Disney+' is '{disney_imdb_group_movies['IMDb Group'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of IMDb Group is '10'

      Total Count of IMDb Group on 'Netflix' is '9'

      Total Count of IMDb Group on 'Hulu' is '9'

      Total Count of IMDb Group on 'Prime Video' is '10'

      Total Count of IMDb Group on 'Disney+' is '9'
 
      
In [62]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_i_ax1 = sns.lineplot(y = imdb_group_data_movies['IMDb Group'], x = imdb_group_data_movies['Netflix'], color = 'red', ax = axes[0, 0])
h_i_ax2 = sns.lineplot(y = imdb_group_data_movies['IMDb Group'], x = imdb_group_data_movies['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_i_ax3 = sns.lineplot(y = imdb_group_data_movies['IMDb Group'], x = imdb_group_data_movies['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_i_ax4 = sns.lineplot(y = imdb_group_data_movies['IMDb Group'], x = imdb_group_data_movies['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_i_ax1.title.set_text(labels[0])
h_i_ax2.title.set_text(labels[1])
p_i_ax3.title.set_text(labels[2])
d_i_ax4.title.set_text(labels[3])

plt.show()
In [63]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_i_ax1 = sns.barplot(x = imdb_group_data_movies['IMDb Group'][:10], y = imdb_group_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_i_ax2 = sns.barplot(x = imdb_group_data_movies['IMDb Group'][:10], y = imdb_group_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_i_ax3 = sns.barplot(x = imdb_group_data_movies['IMDb Group'][:10], y = imdb_group_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_i_ax4 = sns.barplot(x = imdb_group_data_movies['IMDb Group'][:10], y = imdb_group_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_i_ax1.title.set_text(labels[0])
h_i_ax2.title.set_text(labels[1])
p_i_ax3.title.set_text(labels[2])
d_i_ax4.title.set_text(labels[3])
 
plt.show()